import scrapy
from selenium import webdriver

class WangyiSpider(scrapy.Spider):
    name = "wangyi"
 #   allowed_domains = ["nwes.163.com"] #只允许爬取的网站
    start_urls = ["http://news.163.com"] #爬取网页的第一个函数
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        options = webdriver.ChromeOptions()
        options.add_argument('--headless')
        options.add_argument('--allow-insecure-localhost')
        options.add_argument('--disable-web-security')
        print("Dede")
        self.driver=webdriver.Chrome(options=options)

    def parse(self,response):
        list = response.xpath('//div[@class="index_head"]//li/a')
        l1 = [2,3,4,5]
        for i in l1:
            link = list[i].xpath('./@href').extract_first()
            genre = list[i].xpath('./text()').extract_first()
            # yield {
            #     'title':genre,
            #     'title_url':link,
            #     'source':'网易新闻'
            # }
            request=scrapy.Request(url=link,callback=self.parse_one)
            yield  request

    def parse_one(self,response):

        div_list=response.xpath('//div[@class="ndi_main"]/div')
        print(div_list)
        for div in div_list:
            url = div.xpath('.//a[@class="na_pic"]/@href').extract_first()

            title = div.xpath('.//div/div[@class="news_title"]/h3/a/text()').extract_first()
            img_url = div.xpath('.//a[@class="na_pic"]/img/@src').extract_first()
            yield {
                'title':title,
                'img_url':img_url
            }
            request = scrapy.Request(url=url,callback=self.parse_two)
            yield request

    def parse_two(self,response):
        content_list = response.xpath('//div[@class="post_body"]//p/text()').extract()
        content = ''
        for value in content_list:
            content+=value.strip()
        yield {
            'content': content
        }

    def close(self,spider):
        self.driver.close()


